# dependencies
library(tidyverse)
library(knitr)
library(kableExtra)
library(boot)
library(parallel)
library(bayestestR)
library(patchwork)
library(mdthemes)
library(lme4)
library(sjPlot)
library(emmeans)
library(ggstance)
library(janitor)
# library(merTools) called via merTools:: to avoid namespace collisions between MASS and dplyr
# set seed for reproducibility
set.seed(42)
# options
options(knitr.table.format = "html") # necessary configuration of tables
# disable scientific notation
options(scipen = 999)
# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
require(janitor)
df %>% mutate_if(is.numeric, janitor::round_half_up, digits = n_digits)
}
# create necessary directories
dir.create("../data/processed")
dir.create("../data/results")
#dir.create("models")
# get data
data_trial_level <- read_csv("../data/raw/data_trial_level.csv") %>%
filter(timepoint == "baseline" & (age >= 18 | is.na(age)))
# outliers
data_outliers <- data_trial_level %>%
distinct(unique_id, .keep_all = TRUE) %>%
select(unique_id, domain, mean_rt) %>%
mutate(median_mean_rt = median(mean_rt, na.rm = TRUE),
mad_mean_rt = mad(mean_rt, na.rm = TRUE)) %>%
# exclude median +- 2MAD
mutate(rt_outlier = ifelse(mean_rt < median_mean_rt-mad_mean_rt*2 |
mean_rt > median_mean_rt+mad_mean_rt*2, TRUE, FALSE)) %>%
filter(rt_outlier == FALSE) %>%
select(unique_id, rt_outlier) %>%
full_join(data_trial_level, by = "unique_id") %>%
mutate(rt_outlier = ifelse(is.na(rt_outlier), TRUE, rt_outlier))
data_trimmed <- data_outliers %>%
filter(rt_outlier == FALSE)
# data with confidence intervals
data_estimates_D <- read_csv("../data/processed/data_estimates_D.csv") %>%
filter(method == "bca")
data_estimates_PI <- read_csv("../data/processed/data_estimates_PI.csv") %>%
filter(method == "bca")data_outliers %>%
distinct(unique_id, .keep_all = TRUE) %>%
count(rt_outlier) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| rt_outlier | n |
|---|---|
| FALSE | 1462 |
| TRUE | 109 |
data_descriptives <- data_outliers %>%
distinct(unique_id, .keep_all = TRUE)
data_descriptives %>%
count(domain) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| domain | n |
|---|---|
| Body image | 21 |
| Clinton-Trump | 101 |
| Countries (1) | 59 |
| Countries (2) | 55 |
| Death (1) | 19 |
| Death (2) | 22 |
| Death (3) | 29 |
| Disgust (1) | 40 |
| Disgust (2) | 44 |
| Friend-Enemy | 101 |
| Gender (1) | 41 |
| Gender (2) | 101 |
| Lincoln-Hitler | 137 |
| Personality - Agreeableness | 40 |
| Personality - Conscientiousness | 39 |
| Personality - Extraversion | 40 |
| Personality - Neuroticism | 41 |
| Personality - Openness | 41 |
| Race (1) | 45 |
| Race (2) | 44 |
| Religion | 31 |
| Rich-Poor | 98 |
| Sexuality (1) | 27 |
| Sexuality (2) | 21 |
| Shapes & colors (1) | 14 |
| Shapes & colors (2) | 12 |
| Shapes & colors (3) | 12 |
| Shapes & colors (4) | 11 |
| Shapes & colors (5) | 25 |
| Shapes & colors (6) | 28 |
| Shapes & colors (7) | 15 |
| Stigma - ADHD | 62 |
| Stigma - PTSD | 57 |
| Stigma - Schizophrenia | 56 |
| Valenced words | 42 |
data_descriptives %>%
count(domain) %>%
summarize(total_n = sum(n),
min_n_per_domain = min(n),
max_n_per_domain = max(n),
mean_n_per_domain = round_half_up(mean(n, na.rm = TRUE), 2),
median_n_per_domain = round_half_up(median(n, na.rm = TRUE), 2),
sd_n_per_domain = round_half_up(sd(n, na.rm = TRUE), 2)) %>%
gather() %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| key | value |
|---|---|
| total_n | 1571.00 |
| min_n_per_domain | 11.00 |
| max_n_per_domain | 137.00 |
| mean_n_per_domain | 44.89 |
| median_n_per_domain | 40.00 |
| sd_n_per_domain | 30.06 |
data_descriptives %>%
summarize(min_age = round_half_up(min(age, na.rm = TRUE), 2),
max_age = round_half_up(max(age, na.rm = TRUE), 2),
mean_age = round_half_up(mean(age, na.rm = TRUE), 2),
sd_age = round_half_up(sd(age, na.rm = TRUE), 2)) %>%
gather() %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| key | value |
|---|---|
| min_age | 18.00 |
| max_age | 60.00 |
| mean_age | 20.09 |
| sd_age | 4.28 |
data_descriptives %>%
count(gender) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| gender | n |
|---|---|
| Female | 236 |
| Male | 141 |
| Other | 1 |
| NA | 1193 |
In order to convey how small these effects are and what they’re estimated from for a given participant.
data_trimmed %>%
mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4")) %>%
ggplot(aes(rt, fill = block_type)) +
geom_density(alpha = 0.3) +
facet_wrap(~ trial_type, ncol = 1) +
mdthemes::md_theme_linedraw() +
scale_fill_viridis_d(begin = 0.3, end = 0.7) +
ylab("Frequency") +
xlab("Reaction time (ms)") +
labs(fill = "Block type")# data_trimmed %>%
# group_by(unique_id, trial_type) %>%
# summarize(mean_rt_con = mean(rt[block_type == "con"], na.rm = TRUE),
# mean_rt_incon = mean(rt[block_type == "incon"], na.rm = TRUE),
# sd_rt_con = sd(rt[block_type == "con"], na.rm = TRUE),
# sd_rt_incon = sd(rt[block_type == "con"], na.rm = TRUE),
# sd_rt = sd(rt, na.rm = TRUE),
# .groups = "drop") %>%
# mutate(diff_mean_rt = mean_rt_incon - mean_rt_con) %>%
# select(-unique_id) %>%
# group_by(trial_type) %>%
# summarize_all(median) %>%
# round_df(0) %>%
# select(trial_type,
# median_mean_rt_con = mean_rt_con,
# median_mean_rt_incon = mean_rt_incon,
# median_diff_mean_rt = diff_mean_rt,
# median_sd_rt_con = sd_rt_con,
# median_sd_rt_incon = sd_rt_incon,
# median_sd_rt = sd_rt) %>%
# kable() %>%
# kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)data_eg_1 <- data_trimmed %>%
filter(unique_id == "ian death emma_43" & trial_type == "tt1")
#filter(unique_id == "ian death emma_15" & trial_type == "tt1")
#filter(unique_id == "ian death june_57" & trial_type == "tt1")
#dat <- data_trimmed %>%
data_eg_1 %>%
group_by(unique_id, trial_type) %>%
summarize(mean_rt_con = mean(rt[block_type == "con"], na.rm = TRUE),
mean_rt_incon = mean(rt[block_type == "incon"], na.rm = TRUE),
sd_rt = sd(rt, na.rm = TRUE)) %>%
mutate(D = (mean_rt_incon - mean_rt_con)/sd_rt)## # A tibble: 1 × 6
## # Groups: unique_id [1]
## unique_id trial_type mean_rt_con mean_rt_incon sd_rt D
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ian death emma_43 tt1 1350. 1391. 427. 0.0948
ggplot(data_eg_1, aes(rt, block_type)) +
geom_point(alpha = 0.5, position = position_jitter(height = 0.2,
width = 0,
seed = 43)) +
mdthemes::md_theme_linedraw() +
labs(x = "Reaction time (ms)",
y = "Block type") Widths cant be directly compared between D and PI as they have different ranges, so D scores only.
Not meta analyzed as extreme skew in data means that residuals are very non-normal, violating assumptions and underestimating MAP estimates. Instead I simply present MAP estimates.
Most probable estimate among the most probable estimates
data_map_ci_widths <- data_estimates_D %>%
group_by(domain, trial_type) %>%
do(point_estimate(.$ci_width, centrality = "MAP")) %>%
ungroup()
write_csv(data_map_ci_widths, "../data/results/data_map_ci_widths_irap_d_vs_pi.csv")
data_map_ci_widths %>%
summarize(map_map = point_estimate(MAP, centrality = "MAP"),
min_map = min(MAP),
max_map = max(MAP)) %>%
unnest(map_map) %>%
rename(MAP_MAP = MAP) %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| MAP_MAP | min_map | max_map |
|---|---|---|
| 1.31 | 0.75 | 1.35 |
By domain and trial type
data_map_ci_widths %>%
pivot_wider(names_from = trial_type, values_from = MAP) %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| domain | tt1 | tt2 | tt3 | tt4 |
|---|---|---|---|---|
| Body image | 1.31 | 1.31 | 1.34 | 1.26 |
| Clinton-Trump | 1.29 | 1.31 | 1.31 | 1.31 |
| Countries (1) | 1.27 | 1.33 | 1.29 | 1.30 |
| Countries (2) | 1.32 | 1.31 | 1.31 | 1.31 |
| Death (1) | 1.29 | 1.30 | 1.15 | 1.33 |
| Death (2) | 1.29 | 1.29 | 1.30 | 1.30 |
| Death (3) | 1.25 | 1.31 | 1.29 | 1.32 |
| Disgust (1) | 1.12 | 1.13 | 1.13 | 1.13 |
| Disgust (2) | 1.28 | 1.31 | 1.30 | 1.24 |
| Friend-Enemy | 1.30 | 1.30 | 1.32 | 1.32 |
| Gender (1) | 1.32 | 1.31 | 1.32 | 1.31 |
| Gender (2) | 1.29 | 1.31 | 1.29 | 1.30 |
| Lincoln-Hitler | 0.91 | 0.92 | 0.92 | 1.30 |
| Personality - Agreeableness | 1.29 | 1.31 | 1.30 | 1.32 |
| Personality - Conscientiousness | 1.30 | 1.32 | 1.30 | 1.30 |
| Personality - Extraversion | 1.32 | 1.29 | 1.32 | 1.30 |
| Personality - Neuroticism | 1.26 | 1.32 | 1.30 | 1.29 |
| Personality - Openness | 1.31 | 1.31 | 1.34 | 1.32 |
| Race (1) | 1.29 | 1.31 | 1.30 | 1.30 |
| Race (2) | 0.75 | 0.79 | 0.79 | 0.79 |
| Religion | 1.23 | 1.30 | 1.30 | 1.30 |
| Rich-Poor | 1.31 | 1.29 | 1.31 | 1.32 |
| Sexuality (1) | 0.98 | 1.00 | 0.99 | 0.99 |
| Sexuality (2) | 1.01 | 0.90 | 0.98 | 1.01 |
| Shapes & colors (1) | 1.29 | 1.30 | 1.35 | 1.28 |
| Shapes & colors (2) | 1.05 | 1.31 | 1.29 | 1.25 |
| Shapes & colors (3) | 1.32 | 1.31 | 1.30 | 1.23 |
| Shapes & colors (4) | 1.16 | 1.32 | 1.31 | 1.35 |
| Shapes & colors (5) | 1.22 | 1.30 | 1.32 | 1.32 |
| Shapes & colors (6) | 1.29 | 1.29 | 1.30 | 1.28 |
| Shapes & colors (7) | 0.96 | 1.26 | 1.31 | 1.14 |
| Stigma - ADHD | 1.13 | 1.13 | 1.13 | 1.14 |
| Stigma - PTSD | 1.13 | 1.14 | 1.12 | 1.12 |
| Stigma - Schizophrenia | 1.11 | 1.13 | 1.13 | 1.13 |
| Valenced words | 1.21 | 1.33 | 1.32 | 1.31 |
data_ci_width_map_D <- data_estimates_D %>%
group_by(domain, trial_type) %>%
#summarize(median = median(ci_width), .groups = "drop") %>%
do(point_estimate(.$ci_width, centrality = "MAP")) %>%
ungroup() %>%
mutate(MAP = round_half_up(MAP, 3),
trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4"),
trial_type = fct_relevel(trial_type, "Trial type 1", "Trial type 2", "Trial type 3", "Trial type 4")) %>%
#mutate(domain = fct_reorder(domain, MAP, .fun = min)) %>%
#arrange(domain) %>%
mutate(domain = fct_rev(domain))
# save to disk
write_csv(data_ci_width_map_D, "../data/results/data_ci_width_map_D.csv")
# plot
p_ci_widths <-
ggplot(data_ci_width_map_D, aes(MAP, domain)) +
geom_point(position = position_dodge(width = 0.8)) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4, nrow = 1) +
labs(x = "Highest probability (MAP) 95% CI width",
y = "") +
theme(legend.position = "top")
p_ci_widthsBy domain
p_cis_by_domain <-
data_estimates_D %>%
mutate(domain = str_replace(domain, "Personality - ", "Big5: "),
domain = str_replace(domain, "Stigma - ", "Stigma: ")) %>%
arrange(estimate) %>%
group_by(domain) %>%
mutate(ordered_id = row_number()/n()) %>%
ungroup() %>%
ggplot() +
geom_linerange(aes(x = ordered_id, ymin = ci_lower, ymax = ci_upper, color = sig),
alpha = 1) +
geom_point(aes(ordered_id, estimate), size = 0.5, shape = "square") +
geom_hline(yintercept = 0, linetype = "dotted") +
mdthemes::md_theme_linedraw() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top") +
scale_color_viridis_d(end = 0.6, direction = -1) +
xlab("Ranked participant") +
ylab("*D* score") +
labs(color = "95% CI excludes zero point") +
facet_wrap(~ domain, ncol = 5)
p_cis_by_domaindata_diff_zero <-
bind_rows(mutate(data_estimates_D, scoring_method = "*D* scores"),
mutate(data_estimates_PI, scoring_method = "PI scores")) %>%
mutate(domain = as.factor(domain),
trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4"),
trial_type = fct_relevel(trial_type, "Trial type 1", "Trial type 2", "Trial type 3", "Trial type 4")) %>%
group_by(domain, trial_type, scoring_method) %>%
summarize(proportion_diff_zero = mean(sig),
variance = plotrix::std.error(sig)^2,
.groups = "drop") %>%
# model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
mutate(proportion_diff_zero_temp = case_when(proportion_diff_zero < 0.001 ~ 0.001,
proportion_diff_zero > 0.999 ~ 0.999,
TRUE ~ proportion_diff_zero),
proportion_diff_zero_logit = boot::logit(proportion_diff_zero_temp)) %>%
select(-proportion_diff_zero_temp) %>%
#filter(!(proportion_diff_zero == 0 & variance == 0)) %>%
mutate(variance = ifelse(variance == 0, 0.0001, variance))
# # save to disk
write_csv(data_diff_zero, "../data/results/data_diff_zero_irap_d_vs_pi.csv")
# data_diff_zero <- read_csv("../data/results/data_diff_zero_irap_d_vs_pi.csv")p_diff_zero <-
data_diff_zero %>%
mutate(domain = fct_rev(factor(domain))) %>%
ggplot(aes(proportion_diff_zero, domain, color = scoring_method, shape = scoring_method)) +
geom_linerangeh(aes(xmin = proportion_diff_zero - sqrt(variance)*1.96,
xmax = proportion_diff_zero + sqrt(variance)*1.96),
position = position_dodge(width = 0.75)) +
geom_point(position = position_dodge(width = 0.75)) +
scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4) +
labs(x = "Proportion of scores<br/>different from zero point",
y = "",
color = "Scoring method",
shape = "Scoring method") +
theme(legend.position = "top",
panel.spacing = unit(1.5, "lines")) +
coord_cartesian(xlim = c(0,1))
p_diff_zero# fit model
fit_diff_zero <-
lmer(proportion_diff_zero_logit ~ 1 + scoring_method + (1 | domain/trial_type),
weights = 1/variance,
data = data_diff_zero,
# solution from https://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
control = lmerControl(check.nobs.vs.nlev = "ignore",
check.nobs.vs.nRE = "ignore"))
# extract marginal means
results_emm_diff_zero <-
summary(emmeans(fit_diff_zero, ~ scoring_method)) %>%
dplyr::select(scoring_method, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)
# extract re Tau
results_re_tau_diff_zero <- fit_diff_zero %>%
merTools::REsdExtract() %>%
as_tibble(rownames = "trial_type") %>%
rename(tau = value)
# combine
results_diff_zero <- results_emm_diff_zero %>%
# as in metafor package's implementation of prediction intervals, see metafor::predict.rma.R
mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_diff_zero$tau[2]^2)), # [2] is variance for domain
pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_diff_zero$tau[2]^2))) |>
select(-se) |>
mutate_if(is.numeric, boot::inv.logit)
# plot
p_prop_nonzero <-
ggplot(results_diff_zero, aes(scoring_method, estimate,
#color = scoring_method,
#shape = scoring_method,
#group = scoring_method
)) +
geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
geom_point(position = position_dodge(width = 0.8), size = 2.5) +
mdthemes::md_theme_linedraw() +
scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Worse)", "0.25", "0.50", "0.75", "1.00<br/>(Better)")) +
#scale_color_viridis_d(alpha = 1, begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
#scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
scale_x_discrete(labels = c("IRAP D scores", "IRAP PI scores")) +
labs(x = "",
y = "Proportion of scores<br/>different from zero point<br/>") +
theme(legend.position = "none") +
coord_flip(ylim = c(0, 1))
p_prop_nonzeroresults_diff_zero %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| scoring_method | estimate | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| D scores | 0.08 | 0.05 | 0.12 | 0.01 | 0.47 |
| PI scores | 0.08 | 0.05 | 0.12 | 0.01 | 0.46 |
# tests
data_emms_diff_zero <- emmeans(fit_diff_zero, list(pairwise ~ scoring_method), adjust = "holm")
summary(data_emms_diff_zero)$`pairwise differences of scoring_method` %>%
as.data.frame() %>%
select(comparison = 1, p.value) %>%
mutate(p.value = ifelse(p.value < .001, "< .001", round_half_up(p.value, 3))) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| comparison | p.value |
|---|---|
| (D scores) - PI scores | 0.56 |
Within domain and trial type.
Many have argued that the zero point is arbitrary and not a useful reference point. Instead of asking “what proportion of D/PI scores are different from zero?”, we could also ask “what proportion of D/PI scores are different from one another?”
A common way to assess whether for differences between two estimates is to assess for non overlap between their confidence intervals. However, it has been repeatedly pointed out that this is less than ideal: there are situations where confidence intervals overlap slightly and yet the difference in means is significant.
Cornell Statistical Consulting Unit (2008) Overlapping Confidence Intervals and Statistical Significance argue this clearly. From their whitepaper:
The null hypothesis of zero mean difference is rejected when
\(|x_1 - x_2| > t \times \sqrt{SE_1^2 + SE_2^2}\)
The individual confidence intervals do not overlap when
\(|x_1 - x_2| > t \times (SE_1 + SE_2)\)
It can be shown that the following is always true:
\(\sqrt{SE_1^2 + SE_2^2} \le (SE_1 + SE_2)\)
This means that as \(|x_1 - x_2|\) increases there will be a point at which there is a significant difference between the means, but where the confidence intervals still overlaps. I.e., non overlapping confidence intervals indicate differences, but partially overlapping intervals do not exclude that there being differences.
As such, it is more appropriate and liberal to test for differences between each score and every other score (within the same domain and trial type) based on the CI of the difference scores rather than the non-overlap of intervals of the pair of scores.
# # discriminability using non-overlap of CIs
# discriminability <- function(data, i) {
# data_with_indexes <- data[i,] # boot function requires data and index
# ci_lower <- data_with_indexes$ci_lower
# ci_upper <- data_with_indexes$ci_upper
# n_ci_lower <- length(ci_lower)
# n_ci_upper <- length(ci_upper)
# r_ci_lower <- sum(rank(c(ci_lower, ci_upper))[1:n_ci_lower])
# A <- (r_ci_lower / n_ci_lower - (n_ci_lower + 1) / 2) / n_ci_upper
# return(A)
# }
# discriminatory using the significance of the difference score
# the goal here is to assess mean_diff > 1.96 * sqrt(SE1^2 + SE2^2 for every possible comparison EXCLUDING self comparisons. This is tricky to do within a typical tidyverse workflow as it means doing mutates involving each row of a column and every other row of that column but not the same row.
# the below solution is to use expand.grid to find all combinations of a row with itself, and then use the modulus of the length of the row to filter out the self-pairings. Then do mutates on the rows to assess significant differences. It's enough to then summarize the proportion of significant results across all participants.
discriminability <- function(data, i) {
data_with_indexes <- data[i,] # boot function requires data and index
grid_estimates <- expand.grid(data_with_indexes$estimate, data_with_indexes$estimate) |>
mutate(diff = Var1 - Var2,
row_number = row_number(),
modulus = row_number %% (nrow(data_with_indexes)+1)) |>
filter(modulus != 1) |>
select(diff)
grid_se <- expand.grid(data_with_indexes$se, data_with_indexes$se) |>
mutate(critical_value = 1.96 * sqrt(Var1^2 + Var2^2),
row_number = row_number(),
modulus = row_number %% (nrow(data_with_indexes)+1)) |>
filter(modulus != 1) |>
select(critical_value)
proportion_sig_diff <-
bind_cols(grid_estimates, grid_se) |>
mutate(sig = abs(diff) > critical_value) |>
summarize(proportion_sig_diff = mean(sig)) |>
pull(proportion_sig_diff)
return(proportion_sig_diff)
}
bootstrap_discriminability <- function(data){
require(dplyr)
require(boot)
fit <-
boot::boot(data = data,
statistic = discriminability,
R = 2000,
sim = "ordinary",
stype = "i",
parallel = "multicore",
ncpus = parallel::detectCores())
results <- boot::boot.ci(fit, conf = 0.95, type = "perc") # bca bootstraps throw an error, so use next best
output <-
tibble(
estimate = fit$t0,
ci_lower = results$percent[4],
ci_upper = results$percent[5]
)
return(output)
}# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/results/data_discriminability_D.csv")) {
data_discriminability_D <- read_csv("../data/results/data_discriminability_D.csv") %>%
filter(method == "bca")
} else {
# bootstrap D scores
data_discriminability_D <- data_estimates_D |>
mutate(se = (ci_upper - ci_lower)/(1.96*2)) |>
select(unique_id, method, domain, trial_type, estimate, se) |>
group_by(method, domain, trial_type) |>
do(bootstrap_discriminability(data = .)) |>
ungroup() |>
#filter(method == "bca") |>
rename(proportion_discriminable = estimate) |>
mutate(variance = (((ci_upper - ci_lower)/(1.96*2)))^2,
domain = as.factor(domain),
trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4"),
scoring_method = "*D* scores")
# save to disk
write_csv(data_discriminability_D, "../data/results/data_discriminability_D.csv")
data_discriminability_D <- data_discriminability_D %>%
filter(method == "bca")
}# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/results/data_discriminability_PI.csv")) {
data_discriminability_PI <- read_csv("../data/results/data_discriminability_PI.csv") %>%
filter(method == "bca")
} else {
# bootstrap PI scores
data_discriminability_PI <- data_estimates_PI |>
mutate(se = (ci_upper - ci_lower)/(1.96*2)) |>
select(unique_id, method, domain, trial_type, estimate, se) |>
group_by(method, domain, trial_type) |>
do(bootstrap_discriminability(data = .)) |>
ungroup() |>
#filter(method == "bca") |>
rename(proportion_discriminable = estimate) |>
mutate(variance = (((ci_upper - ci_lower)/(1.96*2)))^2,
domain = as.factor(domain),
trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4"),
scoring_method = "PI scores")
# save to disk
write_csv(data_discriminability_PI, "../data/results/data_discriminability_PI.csv")
data_discriminability_PI <- data_discriminability_PI %>%
filter(method == "bca")
}# combine
data_discriminability_combined <-
bind_rows(
mutate(data_discriminability_D, scoring_method = "*D* scores"),
mutate(data_discriminability_PI, scoring_method = "PI scores")
) %>%
mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4")) %>%
#filter(!(proportion_discriminable == 0 & variance == 0)) %>%
mutate(variance = ifelse(variance == 0, 0.0001, variance)) |>
# model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
mutate(
proportion_discriminable_temp = case_when(proportion_discriminable < 0.001 ~ 0.001,
proportion_discriminable > 0.999 ~ 0.999,
TRUE ~ proportion_discriminable),
proportion_discriminable_logit = boot::logit(proportion_discriminable_temp)
) %>%
select(-proportion_discriminable_temp)
p_discriminability <-
data_discriminability_combined %>%
mutate(domain = fct_rev(factor(domain))) %>%
ggplot(aes(proportion_discriminable, domain, color = scoring_method, shape = scoring_method)) +
geom_linerangeh(aes(xmin = proportion_discriminable - sqrt(variance)*1.96,
xmax = proportion_discriminable + sqrt(variance)*1.96),
position = position_dodge(width = 0.75)) +
geom_point(position = position_dodge(width = 0.75)) +
scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4) +
labs(x = "Proportion of scores<br/>differerent from other scores",
y = "",
color = "Scoring method",
shape = "Scoring method") +
theme(legend.position = "top",
panel.spacing = unit(1.5, "lines")) +
coord_cartesian(xlim = c(0,1))
p_discriminability# fit meta analytic model
fit_disciminability <-
lmer(proportion_discriminable_logit ~ 1 + scoring_method + (1 | domain/trial_type),
weights = 1/variance,
data = data_discriminability_combined,
# solution from https://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
control = lmerControl(check.nobs.vs.nlev = "ignore",
check.nobs.vs.nRE = "ignore"))
# extract marginal means
results_emm_disciminability <-
summary(emmeans(fit_disciminability, ~ scoring_method)) %>%
dplyr::select(scoring_method, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)
# extract re Tau
results_re_tau_disciminability <- fit_disciminability %>%
merTools::REsdExtract() %>%
as_tibble(rownames = "trial_type") %>%
rename(tau = value)
# combine
results_disciminability <- results_emm_disciminability %>%
mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_disciminability$tau[2]^2)), # as in metafor package's implementation of credibility intervals, see metafor::predict.rma.R
pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_disciminability$tau[2]^2))) |>
select(-se) |>
mutate_if(is.numeric, boot::inv.logit)
# plot
p_prop_discriminable <-
ggplot(results_disciminability, aes(scoring_method, estimate,
#color = scoring_method,
#shape = scoring_method,
#group = scoring_method
)) +
geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
geom_point(position = position_dodge(width = 0.8), size = 2.5) +
scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Worse)", "0.25", "0.50", "0.75", "1.00<br/>(Better)")) +
#scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
#scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
scale_x_discrete(labels = c("IRAP D scores", "IRAP PI scores")) +
mdthemes::md_theme_linedraw() +
labs(x = "",
y = "Proportion of scores<br/>differerent from other scores<br/>") +
theme(legend.position = "none") +
coord_flip(ylim = c(0, 1))
p_prop_discriminable results_disciminability %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| scoring_method | estimate | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| D scores | 0.08 | 0.06 | 0.11 | 0.02 | 0.35 |
| PI scores | 0.08 | 0.06 | 0.10 | 0.01 | 0.33 |
# tests
data_emms_disciminability <- emmeans(fit_disciminability, list(pairwise ~ scoring_method), adjust = "holm")
summary(data_emms_disciminability)$`pairwise differences of scoring_method` %>%
as.data.frame() %>%
select(comparison = 1, p.value) %>%
mutate(p.value = ifelse(p.value < .001, "< .001", round_half_up(p.value, 3))) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| comparison | p.value |
|---|---|
| (D scores) - PI scores | 0.146 |
NB observed range of confidence intervals
# max range as example
data_estimates_D %>%
dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
max = max(ci_upper, na.rm = TRUE),
.groups = "drop") %>%
mutate(range = max - min) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| min | max | range |
|---|---|---|
| -1.506 | 1.711 | 3.217 |
## calculate observed ranges
observed_range_estimates_D <- data_estimates_D %>%
group_by(domain, trial_type) %>%
dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
max = max(ci_upper, na.rm = TRUE),
.groups = "drop") %>%
mutate(range = max - min)
observed_range_estimates_PI <- data_estimates_PI %>%
group_by(domain, trial_type) %>%
dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
max = max(ci_upper, na.rm = TRUE),
.groups = "drop") %>%
mutate(range = max - min)
# calculate CI / range
data_ci_width_proportions_D <- data_estimates_D %>%
# join this data into the original data
full_join(observed_range_estimates_D, by = c("domain", "trial_type")) %>%
# calculate ci width as a proportion of observed range
mutate(ci_width_proportion = ci_width / range) %>%
mutate(scoring_method = "*D* scores")
data_ci_width_proportions_PI <- data_estimates_PI %>%
# join this data into the original data
full_join(observed_range_estimates_PI, by = c("domain", "trial_type")) %>%
# calculate ci width as a proportion of observed range
mutate(ci_width_proportion = ci_width / range) %>%
mutate(scoring_method = "PI scores")
# combine
data_ci_width_proportions_combined <-
bind_rows(data_ci_width_proportions_D,
data_ci_width_proportions_PI) %>%
mutate(domain = as.factor(domain),
trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4")) %>%
group_by(scoring_method, domain, trial_type) %>%
summarize(ci_width_proportion_mean = mean(ci_width_proportion),
variance = plotrix::std.error(ci_width_proportion)^2) %>%
ungroup() %>%
# logit transform
mutate(ci_width_proportion_mean_temp = case_when(ci_width_proportion_mean < 0.0001 ~ 0.0001,
ci_width_proportion_mean > 0.9999 ~ 0.9999,
TRUE ~ ci_width_proportion_mean),
ci_width_proportion_mean_logit = boot::logit(ci_width_proportion_mean_temp)) %>%
select(-ci_width_proportion_mean_temp)
write_csv(data_ci_width_proportions_combined, "../data/results/data_ci_width_proportions_irap_d_vs_pi.csv")
#data_ci_width_proportions_combined <- read_csv("../data/results/data_ci_width_proportions_irap_d_vs_pi.csv")p_coverage <-
data_ci_width_proportions_combined %>%
mutate(domain = fct_rev(factor(domain))) %>%
mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4")) %>%
ggplot(aes(ci_width_proportion_mean, domain, color = scoring_method, shape = scoring_method)) +
geom_point(position = position_dodge(width = 0.75)) +
scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
geom_linerangeh(aes(xmin = ci_width_proportion_mean - sqrt(variance)*1.96,
xmax = ci_width_proportion_mean + sqrt(variance)*1.96),
position = position_dodge(width = 0.75)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4) +
labs(x = "Proportion of observed range covered<br/>by individual scores' 95% CIs",
y = "",
color = "Scoring method",
shape = "Scoring method") +
theme(legend.position = "top",
panel.spacing = unit(1.5, "lines")) +
coord_cartesian(xlim = c(0,1))
p_coverage# fit model
fit_ci_width_proportions <-
lmer(ci_width_proportion_mean_logit ~ 1 + scoring_method + (1 | domain/trial_type),
weights = 1/variance,
data = data_ci_width_proportions_combined,
# solution from https://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
control = lmerControl(check.nobs.vs.nlev = "ignore",
check.nobs.vs.nRE = "ignore"))
# extract marginal means
results_emm_ci_width_proportions <-
summary(emmeans(fit_ci_width_proportions, ~ scoring_method)) %>%
dplyr::select(scoring_method, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)
# extract re Tau
results_re_tau_ci_width_proportions <-
merTools::REsdExtract(fit_ci_width_proportions) %>%
as_tibble(rownames = "trial_type") %>%
rename(tau = value)
# combine
results_ci_width_proportions <- results_emm_ci_width_proportions %>%
mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_ci_width_proportions$tau[2]^2)), # as in metafor package's implementation of credibility intervals, see metafor::predict.rma.R
pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_ci_width_proportions$tau[2]^2))) %>%
select(-se) %>%
mutate_if(is.numeric, boot::inv.logit)
# plot
p_ci_width_proportion_observed_range <-
ggplot(results_ci_width_proportions, aes(scoring_method, estimate,
#color = scoring_method,
#shape = scoring_method,
#group = scoring_method
)) +
geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
geom_point(position = position_dodge(width = 0.8), size = 2.5) +
scale_shape_discrete(labels = c("*D* scores", "PI scores")) +
scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Better)", "0.25", "0.50", "0.75", "1.00<br/>(Worse)")) +
#scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
#scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
scale_x_discrete(labels = c("IRAP D scores", "IRAP PI scores")) +
mdthemes::md_theme_linedraw() +
labs(x = "",
y = "Proportion of observed range covered<br/>by individual scores' 95% CIs") +
theme(legend.position = "none") +
coord_flip(ylim = c(0, 1))
p_ci_width_proportion_observed_rangeresults_ci_width_proportions %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| scoring_method | estimate | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| D scores | 0.51 | 0.50 | 0.53 | 0.42 | 0.61 |
| PI scores | 0.49 | 0.47 | 0.51 | 0.40 | 0.58 |
# tests
data_emms_ci_width_proportions <- emmeans(fit_ci_width_proportions, list(pairwise ~ scoring_method), adjust = "holm")
summary(data_emms_ci_width_proportions)$`pairwise differences of scoring_method` %>%
as.data.frame() %>%
select(comparison = 1, p.value) %>%
mutate(p.value = ifelse(p.value < .001, "< .001", round_half_up(p.value, 3))) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| comparison | p.value |
|---|---|
| (D scores) - PI scores | < .001 |
p_cis_by_domainggsave(filename = "plots/figure_1_cis_by_domain_irap_d.pdf",
plot = p_cis_by_domain,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 10,
height = 10,
limitsize = TRUE)Most probable CI width for D scores when bootstrapped using four different methods. Very similar results are found across methods. Overall Maximum A-Posteroi width (MAP, i.e., the mode of a continuous variable) was D = 1.31. Some domains and trial types did demonstrate smaller most probable widths.
NB I elected not to meta-analyze these widths as they demonstrate very large skew at the individual level, which violate the assumptions of linear meta-analysis and underestimate the typical width (ie estimated mean widths << MAP observed widths). Rather than meta analyze, I simply report the domain and trial type level MAP values. More informative and valid analyses are presented below - ones which can directly compare the D and PI as an alternative. That could not be accomplished with a direct comparison with D/PI scores’ 95% CIs as they are on different scales and follow different distributions.
p_ci_widthsggsave(filename = "plots/supplementary_figure_1S_ci_widths_irap_d.pdf",
plot = p_ci_widths,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 8,
height = 6,
limitsize = TRUE)p_diff_zeroggsave(filename = "plots/figure_2_proportion_excluding_zero_point_irap.pdf",
plot = p_diff_zero,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 8,
height = 7,
limitsize = TRUE)p_discriminabilityggsave(filename = "plots/figure_3_proportion_discriminable_irap.pdf",
plot = p_discriminability,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 8,
height = 7,
limitsize = TRUE)p_coverageggsave(filename = "plots/figure_4_proportion_coverage_irap.pdf",
plot = p_coverage,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 8,
height = 7,
limitsize = TRUE)The results of three hierarchical/meta analytic models are presented below, all of which provide information via different methods regarding how informative an individual participants’ D (or PI) score is in terms of being able to state that they demonstrated evidence of a bias/IRAP effect/implicit attitude, whether that individual can be discriminated from other individuals in the same domain using the same trial type, and how much of the range of observed scores an individuals score’s CI spans.
All meta analyses compare D and PI scores to assess whether the results are dependent on the D algorithm which has been argued to be suboptimal. That is, I assess whether this issue can be trivially resolved by scoring the data a different way.
Note that the theoretical max possible range of D scores is -2 to +2, but such extreme scores are practically impossible. As such, in order to understand the CI width in terms of realistic data - and also in order to compare D and PI which are on different scales and distributions - I standardize CI widths by the observed range of scores for each domain and trial type.
p_combined <-
p_prop_nonzero +
p_prop_discriminable +
p_ci_width_proportion_observed_range +
plot_layout(ncol = 1) #, guides = "collect") & theme(legend.position = "top")
p_combinedggsave(filename = "plots/figure_5_metaanalyses_irap_d_vs_irap_pi.pdf",
plot = p_combined,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 5,
height = 5,
limitsize = TRUE)sessionInfo()## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] janitor_2.1.0 ggstance_0.3.5 emmeans_1.7.5 sjPlot_2.8.10
## [5] lme4_1.1-30 Matrix_1.4-1 mdthemes_0.1.0 patchwork_1.1.1
## [9] bayestestR_0.12.1 boot_1.3-28 kableExtra_1.3.4 knitr_1.39
## [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4
## [17] readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6
## [21] tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] TH.data_1.1-1 googledrive_2.0.0 minqa_1.2.4
## [4] colorspace_2.0-3 ellipsis_0.3.2 sjlabelled_1.2.0
## [7] estimability_1.4 snakecase_0.11.0 markdown_1.1
## [10] parameters_0.18.1 fs_1.5.2 gridtext_0.1.4
## [13] ggtext_0.1.1 rstudioapi_0.13 listenv_0.8.0
## [16] furrr_0.3.0 farver_2.1.1 bit64_4.0.5
## [19] fansi_1.0.3 mvtnorm_1.1-3 lubridate_1.8.0
## [22] xml2_1.3.3 codetools_0.2-18 splines_4.2.1
## [25] cachem_1.0.6 sjmisc_2.8.9 jsonlite_1.8.0
## [28] nloptr_2.0.3 ggeffects_1.1.2 pbkrtest_0.5.1
## [31] broom_1.0.0 dbplyr_2.2.1 broom.mixed_0.2.9.4
## [34] shiny_1.7.2 effectsize_0.7.0 compiler_4.2.1
## [37] httr_1.4.3 sjstats_0.18.1 backports_1.4.1
## [40] assertthat_0.2.1 fastmap_1.1.0 gargle_1.2.0
## [43] cli_3.3.0 later_1.3.0 htmltools_0.5.3
## [46] tools_4.2.1 coda_0.19-4 gtable_0.3.0
## [49] glue_1.6.2 merTools_0.5.2 Rcpp_1.0.9
## [52] cellranger_1.1.0 jquerylib_0.1.4 vctrs_0.4.1
## [55] svglite_2.1.0 nlme_3.1-157 iterators_1.0.14
## [58] insight_0.18.0 xfun_0.31 globals_0.15.1
## [61] rvest_1.0.2 mime_0.12 lifecycle_1.0.1
## [64] googlesheets4_1.0.0 future_1.27.0 MASS_7.3-57
## [67] zoo_1.8-10 scales_1.2.0 vroom_1.5.7
## [70] promises_1.2.0.1 hms_1.1.1 sandwich_3.0-2
## [73] yaml_2.3.5 sass_0.4.2 stringi_1.7.8
## [76] highr_0.9 foreach_1.5.2 plotrix_3.8-2
## [79] blme_1.0-5 rlang_1.0.4 pkgconfig_2.0.3
## [82] systemfonts_1.0.4 arm_1.12-2 evaluate_0.15
## [85] lattice_0.20-45 labeling_0.4.2 bit_4.0.4
## [88] tidyselect_1.1.2 parallelly_1.32.1 magrittr_2.0.3
## [91] R6_2.5.1 generics_0.1.3 multcomp_1.4-20
## [94] DBI_1.1.3 pillar_1.8.0 haven_2.5.0
## [97] withr_2.5.0 abind_1.4-5 survival_3.3-1
## [100] datawizard_0.4.1 performance_0.9.1 modelr_0.1.8
## [103] crayon_1.5.1 utf8_1.2.2 tzdb_0.3.0
## [106] rmarkdown_2.14 grid_4.2.1 readxl_1.4.0
## [109] reprex_2.0.1 digest_0.6.29 webshot_0.5.3
## [112] xtable_1.8-4 httpuv_1.6.5 munsell_0.5.0
## [115] viridisLite_0.4.0 bslib_0.4.0